The Legacy Of Redlining On Chicago's Lending Patterns and Demographics

By: Vedika Ahuja

Setup

In [0]:
from google.colab import drive
from google.colab import files
In [0]:
drive.mount('/content/drive', force_remount = True)
In [0]:
!pip install geopandas
In [0]:
!pip install descartes
In [0]:
!pip install geos
In [0]:
!pip install shapely
In [0]:
!apt install python3-rtree
In [0]:
!pip install geos
In [0]:
!pip install ptitprince
In [0]:
!pip install mpld3
In [0]:
import rtree
import descartes
import plotly.express as px
import pandas as pd
import numpy as np
import altair as alt
import vega_datasets as vega_data
import json
import geopandas as gpd
import regex as re
import shapely
import geos
import seaborn as sns
import os
import matplotlib.pyplot as plt
#sns.set(style="darkgrid")
#sns.set(style="whitegrid")
#sns.set_style("white")
sns.set(style="whitegrid",font_scale=2)
import matplotlib.collections as clt
import mpld3

import ptitprince as pt
In [0]:
alt.data_transformers.disable_max_rows()
In [0]:
cd 'drive/My Drive/data_viz'

Theme

In [149]:
markColor = 'black'
axisColor = 'black'
backgroundColor = 'grey';
font = 'Gotham';
labelFont = 'Gotham';
sourceFont = 'Gotham';
gridColor = 'black';
titleFontSize = 20;
subtitleFontSize = 15;
holc_scheme = ["#86B404","#045FB4", "#F3F781", "#DF3A01"]
redlining_pal = ["#FFFFB2", "#FECC5C", "#FD8D3C", "#E31A1C"]

def vedika_theme():
  return {
          'config': {
              'view': {
                  'height': 500,
                  'width': 500,
              },
              #'mark': {
                  #'color': 'black',
                  #'fill': 'black'
             #},
              "title": {
                "anchor": 'start',
                "fontSize": titleFontSize,
                "font": font,
                "subtitleFont" : font,
                "subtitleFontSize" : subtitleFontSize,
                "titlePadding" : 20
              },


              "axisX": {
                "domain": True,
                "domainColor": axisColor,
                "domainWidth": 1,
                "grid": False,
                "labelFontSize": 12,
                "labelFont": labelFont,
                "labelAngle": 0,
                "tickColor": axisColor,
                "tickSize": 5,
                "titleFontSize": 12,
                "titlePadding": 10,
                "titleFont": font
              },

              "axisY": {
                "domain": False,
                "domainWidth": 1,
                "grid": True,
                "gridColor": gridColor,
                "gridWidth": 1,
                "labelFontSize": 12,
                "labelFont": labelFont,
                "labelPadding": 8,
                "ticks": False,
                "grid" : False,
                "titleFontSize": 12,
                "titlePadding": 10,
                "titleFont": font,
                #"titleAngle": 0,
                #"titleY": -10,
                #"titleX": 18
              },

              "legend": {
                "labelFont": labelFont,
                "labelFontSize": 12,
                "symbolType": "circle",
                "symbolSize": 200,
                "titleFont": font,
                "titleFontSize": 12,
                #"title": "", # set it to no-title by default
                "orient": "right", # so it's right next to the y-axis
                "offset": 20, # literally right next to the y-axis.
            },
            "view": { # altair uses gridlines to box the area where the data is visualized. This takes that off.
                "strokeWidth" : 0
            },

            #"background": {
            #    "color": "#FFFFFF", # white rather than transparent
            #},
            ### MARKS CONFIGURATIONS ###
            #"area": {
            #   "fill": "transparent",
           #},
       "range": {
              "category": {"scheme" : 'tableau10'},
              "diverging": {"scheme" : 'yelloworangered'},
              "heatmap": {"scheme":'diverging-colors'},
              "ordinal": {"scheme":'holc_scheme'}}
              },

          }

# register the custom theme under a chosen name
alt.themes.register('vedika_theme', vedika_theme)
# enable the newly registered theme
alt.themes.enable('vedika_theme')
Out[149]:
ThemeRegistry.enable('vedika_theme')

Load, Clean, and Explore Data

In [0]:
def open_geojson(geo_json_file_loc):
    with open(geo_json_file_loc) as json_data:
        d = json.load(json_data)
    return d

def get_gpd_df(file_location, use_shape_file=False):
    if use_shape_file:
        gdf = gpd.read_file(file_location)
    else:
        file_json = open_geojson(file_location)
        gdf = gpd.GeoDataFrame.from_features((file_json))
    return gdf
In [0]:
geo_json_file_loc = "data/raw/Boundaries - Census Tracts - 2010.geojson"
geo_json_community_areas = "data/raw/Boundaries - Community Areas (current).geojson"

#load data
hmda_2007 = pd.read_csv("data/raw/hmda_2007_il_first-lien-owner-occupied-1-4-family-records_labels.csv")
hmda_2017 = pd.read_csv("data/raw/hmda_2017_il_first-lien-owner-occupied-1-4-family-records_labels.csv")
chi_census_tracts = pd.read_csv("data/raw/CensusTractsTIGER2010.csv")
chi_community_areas = pd.read_csv("data/raw/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv")
chi_census_geojson = "data/raw/Boundaries - Census Tracts - 2010.geojson"
comm_area_gpd = get_gpd_df(geo_json_community_areas)
In [0]:
def fix_census_tract_num(df):
  df['census_tract_number'] = df['GEO.id2'].str[-6:-2] + "." + df['GEO.id2'].str[-2:]
  df['census_tract_number'].replace('\.0$', '', regex=True, inplace=True)
  df['census_tract_number'].replace('\.00$', '', regex=True, inplace=True)
  df['census_tract_number'] = df['census_tract_number'].str.lstrip('0')

  return df

Census Preprocessing

In [0]:
acs_demos_2017 = pd.read_csv("data/raw/ACS_17_5YR_DP05_with_ann.csv")[1:]

acs_demos_2017['tot_pop_2017'] = acs_demos_2017['HC01_VC03'].astype("int")
acs_demos_2017['white_pop_2017'] = acs_demos_2017['HC01_VC99'].astype("int")
acs_demos_2017['black_pop_2017'] = acs_demos_2017["HC01_VC100"].astype("int")
acs_demos_2017['asian_pop_2017'] = acs_demos_2017['HC01_VC102'].astype("int")
acs_demos_2017['AIAN_pop_2017'] = acs_demos_2017['HC01_VC101'].astype("int")
acs_demos_2017['not_hispanic'] = acs_demos_2017["HC01_VC98"].astype("int")
acs_demos_2017['hispanic_pop_2017'] = acs_demos_2017['tot_pop_2017'] - acs_demos_2017['not_hispanic']
acs_demos_2017 = fix_census_tract_num(acs_demos_2017)
acs_demos_2017['poc_pop_2017'] = acs_demos_2017['tot_pop_2017'] - acs_demos_2017['white_pop_2017']
acs_demos_2017['poc_perc_2017'] = acs_demos_2017['poc_pop_2017']/acs_demos_2017['tot_pop_2017']
acs_demos_2017 = acs_demos_2017[["census_tract_number","poc_perc_2017", "poc_pop_2017", "tot_pop_2017", 'white_pop_2017', 'black_pop_2017', 'AIAN_pop_2017', 'asian_pop_2017', 'hispanic_pop_2017']]
In [0]:
acs_demos_2017['poc_alt'] = acs_demos_2017['black_pop_2017'] + acs_demos_2017['asian_pop_2017'] + acs_demos_2017['AIAN_pop_2017']  + acs_demos_2017['hispanic_pop_2017']
acs_demos_2017['poc_perc_alt'] = acs_demos_2017['poc_alt'] / acs_demos_2017['tot_pop_2017']
In [0]:
acs_demos_2010 = pd.read_csv("data/raw/ACS_10_5YR_DP05_with_ann.csv")[1:]
acs_demos_2010 = fix_census_tract_num(acs_demos_2010)
acs_demos_2010['tot_pop_2010'] = acs_demos_2010['HC01_VC03'].astype('int')
acs_demos_2010['white_pop_2010'] = acs_demos_2010['HC01_VC88'].astype('int')
acs_demos_2010['black_pop_2010'] = acs_demos_2010['HC01_VC89'].astype('int')
acs_demos_2010['AIAN_pop_2010'] = acs_demos_2010['HC01_VC90'].astype('int')
acs_demos_2010['asian_pop_2010'] = acs_demos_2010['HC01_VC91'].astype('int')
acs_demos_2010['not_hispanic'] = acs_demos_2010['HC01_VC87'].astype('int')
acs_demos_2010['hispanic_pop_2010'] = acs_demos_2010['tot_pop_2010'] - acs_demos_2010['not_hispanic']
acs_demos_2010['poc_pop_2010'] = acs_demos_2010['tot_pop_2010'] - acs_demos_2010['white_pop_2010']
acs_demos_2010['poc_perc_2010'] = acs_demos_2010['poc_pop_2010']/acs_demos_2010['tot_pop_2010']
acs_demos_2010 = acs_demos_2010[["census_tract_number", "tot_pop_2010", 'white_pop_2010', 'black_pop_2010', 'AIAN_pop_2010', 'asian_pop_2010', 'hispanic_pop_2010']]
In [0]:
#download 2017 median income data
acs_median_income_2017 = pd.read_csv("data/raw/ACS_17_5YR_S1903_with_ann.csv")[1:]
acs_median_income_2017 = fix_census_tract_num(acs_median_income_2017)
acs_median_income_2017['median_income_tract_2017'] = acs_median_income_2017['HC03_EST_VC02']
In [0]:
acs_median_income_2017 = acs_median_income_2017[['census_tract_number', 'median_income_tract_2017']]
In [0]:
#2000 census - come back to this
census_2000 = pd.read_csv('data/raw/DEC_00_SF1_QTP3_with_ann.csv')[1:]
#HC01_VC39 - Number; HISPANIC OR LATINO - Total population - Not Hispanic or Latino
#HC01_VC04 - Number; RACE - Total population - One race - White
#census_2000 = pd.read_csv('data/raw/DEC_00_SF1_QTP3_with_ann.csv')[1:]
In [0]:
census_2000 = fix_census_tract_num(census_2000)
In [0]:
census_2000['total'] = census_2000["HC01_VC02"].astype('int')
census_2000['hispanic'] = census_2000["HC01_VC34"].astype('int')
census_2000['black'] = census_2000["HC01_VC05"].astype('int')
census_2000['white'] = census_2000["HC01_VC04"].astype('int')
census_2000['asian'] = census_2000["HC01_VC11"].astype('int')
census_2000['AIAN'] = census_2000["HC01_VC20"].astype('int')
In [0]:
census_2000_south = census_2000[census_2000['census_tract_number'].isin(["8140", "3302", "3301"])]
In [0]:
census_2000_south = census_2000_south[['total', 'hispanic', 'black','white','asian', 'AIAN'  ]]
In [0]:
census_2000_south['ind'] = 1
In [0]:
census_2000_south = census_2000_south.groupby('ind').sum().reset_index()
In [0]:
census_2000_wp = census_2000[census_2000['census_tract_number'].isin(["8345", "8361", "4003", "4004", "4005",  "4008"])]

HMDA Preprocessing

In [0]:
#census tract map prep
geo_json_file_loc = "data/raw/Boundaries - Census Tracts - 2010.geojson"
census_df = get_gpd_df(chi_census_geojson)
#census_df = gpd.GeoDataFrame.from_features((census_json)) - I think I can get rid of this.
#save geojson as json 
with open(geo_json_file_loc) as json_data:
        census_json = json.load(json_data)

chi_census_tracts = census_df.rename({'name10' : 'census_tract_number'}, axis='columns')
chi_census_tracts['census_tract_number'] = chi_census_tracts['census_tract_number'].astype('str')
In [0]:
#Create hmda dataset
hmda_all = hmda_2007.append(hmda_2017)
hmda_all = hmda_all[hmda_all['loan_purpose_name'] == 'Home purchase']
hmda_all['census_tract_number'] = hmda_all['census_tract_number'].astype('str')
hmda_all = hmda_all[hmda_all['county_name'] == "Cook County"]
#if census tract ends in .0, get rid of .0
hmda_all['census_tract_number'].replace('\.0$', '', regex=True, inplace=True)
In [0]:
hmda_all['race_ethnicity'] = np.where(hmda_all['applicant_ethnicity_name'] == 'Hispanic or Latino', 'Latino', hmda_all['applicant_race_name_1'])
hmda_all.loc[hmda_all['race_ethnicity'] == 'Information not provided by applicant in mail, Internet, or telephone application', 'race_ethnicity'] = 'NA'
hmda_all.loc[hmda_all['race_ethnicity'] == 'Not applicable', 'race_ethnicity'] = 'NA'
In [0]:
#population by census tract
pop_over_time = hmda_all.groupby(['census_tract_number', "as_of_year"]).agg({'population' : 'mean'}).reset_index()
pop_over_time['year'] = np.where(pop_over_time['as_of_year'] == 2007, "year_2007", "year_2017")
pop_over_time = pop_over_time.pivot(index='census_tract_number', columns='year', values='population').reset_index()
pop_over_time = pop_over_time.rename({"year_2007": "pop_2007", "year_2017": "pop_2017"}, axis="columns")
In [0]:
pop_minority = hmda_all.groupby(['census_tract_number', "as_of_year"]).agg({'minority_population' : 'mean'}).reset_index()
pop_minority['minority_population'] = pop_minority['minority_population']/100
pop_minority['year'] = np.where(pop_minority['as_of_year'] == 2007, "year_2007", "year_2017")
pop_minority = pop_minority.pivot(index='census_tract_number', columns='year', values='minority_population').reset_index()
pop_minority = pop_minority.rename({"year_2007": "pop_min_2007", "year_2017": "pop_min_2017"}, axis="columns")
In [0]:
# Aggregate HMDA Data by census tract, and clean
## count by census tract
chi_census_loan_counts = hmda_all.groupby(['census_tract_number', "as_of_year"]).agg({'respondent_id':'count',
                                                                                      'population' : 'mean'}).reset_index()
chi_census_loan_counts['year'] = np.where(chi_census_loan_counts['as_of_year'] == 2007, "year_2007", "year_2017")
chi_census_loan_counts = chi_census_loan_counts.pivot(index='census_tract_number', columns='year', values='respondent_id').reset_index()
chi_census_loan_counts = chi_census_loan_counts[chi_census_loan_counts['census_tract_number'] != 'nan']
chi_census_loan_counts['year_2007'].fillna(value=0, inplace=True)
chi_census_loan_counts['year_2017'].fillna(value=0, inplace=True)
chi_census_loan_counts = chi_census_loan_counts.rename({"year_2007": "loans_2007", "year_2017": "loans_2017"}, axis="columns")
In [0]:
#merge census level data
census_to_counts = chi_census_tracts.merge(chi_census_loan_counts, on="census_tract_number", how="left", indicator = "True")
census_to_counts = census_to_counts.merge(pop_over_time, on="census_tract_number", how="left")
census_to_counts = census_to_counts.merge(pop_minority, on="census_tract_number", how="left")
In [0]:
## add some loan measures
census_to_counts['loans_per_100_ppl_2017'] = (census_to_counts['loans_2017']/census_to_counts['pop_2017'])*100
census_to_counts['loans_per_100_ppl_2007'] = (census_to_counts['loans_2007']/census_to_counts['pop_2007'])*100
In [0]:
census_to_counts = census_to_counts.merge(acs_demos_2010, on='census_tract_number', indicator='merge_2010', how='left')
census_to_counts = census_to_counts.merge(acs_demos_2017,  on='census_tract_number', indicator='merge_2017', how='left')
In [0]:
census_to_counts_map = census_to_counts.merge(acs_median_income_2017,  on='census_tract_number', indicator='merge_income_2017', how='left')
In [0]:
census_to_counts_map['change_in_pop'] = census_to_counts_map['tot_pop_2017'] - census_to_counts_map['tot_pop_2010']
census_to_counts_map['change_in_black_pop'] = census_to_counts_map['black_pop_2017'] - census_to_counts_map['black_pop_2010']
In [0]:
#census_to_counts_for_map = census_to_counts[['geometry', 'census_tract_number', 'year_2007', 'year_2017']]
In [0]:
#for now drop these - I need to figure out how to adjust the scale of the maps to fix this
#These census tracts are in the loop - huge condos have come up
#census_to_counts_for_map['year_2017'] = np.where(census_to_counts_for_map['year_2017'] >300, 300, census_to_counts_for_map['year_2017'])
#census_to_counts_for_map['year_2007'] = np.where(census_to_counts_for_map['year_2007'] >300, 300, census_to_counts_for_map['year_2017'])
In [0]:
#median income 2017 by census tract polgyons
#tract_median_2017 = hmda_all[hmda_all['as_of_year'] == 2017]
#tract_median_2017['tract_median_income'] = tract_median_2017['tract_to_msamd_income']*tract_median_2017['hud_median_family_income']/100
#census_income = tract_median_2017.groupby('census_tract_number').agg({'tract_median_income' : 'mean'}).reset_index()
#census_to_counts_map = census_to_counts.merge(census_income, on = "census_tract_number", how="left")
In [0]:
count_json = json.loads(census_to_counts_map.to_json())
count_data = alt.Data(values=count_json['features'])

Introduction

For the majority of the 20th century, financial institutions denied communities of color, especially Black communities, affordable home loans through a practice called redlining. The Federal government sanctioned and supported this systematic denial of home mortgages starting in the 1930s, when it created the Home Owners Loan Corporation (HOLC) to insure private mortgages. The HOLC created maps of major American cities to document the stability and the risks associated with lending by neighborhood.

Loan officers, appraisers and real estate professionals used the HOLC maps to decide where to provide loans and the terms of the loans. The HOLC maps documented Chicago neighborhoods in great detail, labeling each neighborhood on a riskiness scale from “A”, “best”, to “D”, “hazardous”, and providing a qualitative description of each neighborhood. The grading and descriptions are based largely on the racial and ethnic make-up, changing demographics, and housing conditions and access to public facilities in each neighborhood. Neighborhoods with high percentages of people of color, especially black folks, were coined hazardous, regardless of other factors.

These HOLC maps in Chicago not only provided guidance to real estate professionals, but also reflected how various industries essentially isolated Black communities in areas with lower investments than their white counterparts. The systematic disinvestment in communities of color in Chicago prevented them from building wealth through homeownership and accessing products and services to thrive.

The broad practice of redlining shaped how Chicago formed, robbing Black and Brown communities of the opportunity to build intergenerational wealth through homeownership. It’s legacy lives on, though not exactly how we would expect. Formerly redlined communities have changed considerably in Chicago, in ways that are important to understand when creating policies to address the racial wealth gap today. Democratic presidential candidates Elizabeth Warren and Pete Buttigieg have plans to address the impacts of redlining, and both offer benefits to residents currently living in formerly redlined areas. In Chicago, these plans could miss the people most impacted by redlining practices.

Redlining

Redlining Map

Redlined Communities in 1940 and Lending Patterns Today

There’s a popular adage in the advocacy and data viz communities in Chicago - “All maps of the city look the same.” The saying unfortunately refers to the fact that many socioeconomic indicators related to poverty, crime, and police misconduct are clustered in the South and West parts of Chicago, where Black and Brown communities live today. However, the HOLC map of Chicago created in 1940 looks quite different.

In [0]:
#clean redlining data
chi_1940 = get_gpd_df("data/raw/ILChicago1940.geojson")
chi_1940["holc_grade_n"] = np.where(chi_1940["holc_grade"] == "A", 1,
                                    np.where(chi_1940["holc_grade"] == "B", 2,
                                      np.where(chi_1940["holc_grade"] == "C", 3,
                                    4)))
In [0]:
#merge redlining data with census data
census_to_counts_map['point_centroid'] = census_to_counts_map['geometry'].centroid
census_centroids = census_to_counts_map.drop(columns="geometry", axis=1).rename({"point_centroid" : "geometry"}, axis=1)
census_w_holc = gpd.sjoin(census_centroids, chi_1940, how="left", op='intersects')
census_w_holc["holc_grade_n"] = census_w_holc["holc_grade_n"].fillna(value=0)
census_w_holc["holc_grade"] = np.where(census_w_holc["holc_grade_n"] == 0, "NA", census_w_holc["holc_grade"])
In [0]:
census_w_holc["loans_2017"] = census_w_holc["loans_2017"].fillna(value=0)
In [0]:
#find maximum latitude of income map
census_to_counts_map["geometry"].total_bounds
chi_1940['latitude_centroids'] = chi_1940["geometry"].centroid.y
chi_1940_map = chi_1940[chi_1940['latitude_centroids'] < 42.023924]
chi_1940_map = chi_1940_map[chi_1940_map['latitude_centroids'] > 41.644286]
In [0]:
chi_1940_map.loc[chi_1940_map['holc_grade'] == "A", 'holc_grade'] = "A: Most Desirable"
chi_1940_map.loc[chi_1940_map['holc_grade'] == "B", 'holc_grade'] = "B: Still Desirable"
chi_1940_map.loc[chi_1940_map['holc_grade'] == "C", 'holc_grade'] = "C: Declining"
chi_1940_map.loc[chi_1940_map['holc_grade'] == "D", 'holc_grade'] = "D: Hazardous"
In [56]:
redline_map_json = json.loads(chi_1940_map.to_json())
redline_data = alt.Data(values=redline_map_json['features'])

census_base = alt.Chart(count_data).mark_geoshape(
        fill = 'lightgray',
        stroke='lightgray',
        strokeWidth=1
    ).encode(
    )



redlining_chloro = alt.Chart(redline_data).mark_geoshape(opacity=.9
    ).encode(
        alt.Color('properties.holc_grade',
                  type='ordinal',
                  scale=alt.Scale(scheme="yelloworangered"),
                  legend =alt.Legend(title = "HOLC Grade"))).properties(
                      title={"text":["HOLC Grades in 1940 Chicago"], "fontSize" : 12},
                      height = 500,
                      width = 500)

redlining_map = census_base + redlining_chloro

loan_chloro = alt.Chart(count_data).mark_geoshape(
    ).encode(
        alt.Color('properties.loans_per_100_ppl_2017',
                  type='quantitative',
                  scale=alt.Scale(scheme="tealblues"),
                  legend =alt.Legend(title = "Loans per 100 people", clipHeight=4))).properties(
                      title= {"text":["Loans Per 100 People in 2017"], "fontSize" : 12},
                      height = 500, width = 500)

alt.hconcat(redlining_map, loan_chloro).properties(
                      title={"text" : "Lending Patterns have Changed Tremendously over the Last 80 years",
                             'subtitle' : ["Many areas redlined in the past are now growing at the fastest rates in Chicago", "Source: HMDA 2017 Data"],
                             'subtitleFont': 'Gotham',
                             'subtitleFontSize' : 14}).configure_legend(
    #strokeColor='gray',
    #fillColor='#EEEEEE',
    padding=6,
    cornerRadius=10,
    orient='bottom',
    columns = 0
)



## I want this map to look like the actual HOLC map!
# There are a number of ways I could show this first map
  # fix the legends
  # put the maps closer to each other
  # make the redlining map more transparent
Out[56]:

Note: The HOLC map has slightly different borders than today’s map because it did not grade every area of the city, and graded areas that today fall beyond the city’s border.

The map on the left is the HOLC map of Chicago in 1940. The HOLC map shows most parts at the center of the city and spanning North East and South were graded “D”, “Hazardous.” Communities farther from the center, in the North and South of the Loop were rated “C,” “definitely declining.” At the time, suburbs farther from the city center were considered more desirable, because of the low population of people of color, immigrants, and renters. The HOLC map also downgraded areas based on congestion, age of buildings, and density of businesses.

The map on the right shows where loans are concentrated today. The map looks quite different - the loans are most concentrated in the loop and North west of the loop. These areas were rated either “C” or “D” by the HOLC in the 1940s. Fewer loans originate West of the loop and South of the Loop. Redlined neighborhoods have fared differently around Chicago over the past 80 years. This becomes more apparent when comparing the distribution of loans in census tracts by how the tract was rated by the HOLC in 1940.

Redlining and Income Rainclout Plot

In [0]:
census_to_counts_map['point_centroid'] = census_to_counts_map['geometry'].centroid
census_centroids = census_to_counts_map.drop(columns="geometry", axis=1).rename({"point_centroid" : "geometry"}, axis=1)
census_w_holc = gpd.sjoin(census_centroids, chi_1940, how="left", op='intersects')
In [0]:
census_w_holc.loc[census_w_holc["holc_grade"].isna(), 'holc_grade'] = 'NA'
In [0]:
#clean neighborhood data
comm_area_demographics = pd.read_csv("data/raw/Census-Data-by-Chicago-Community-Area-2018.csv",
                                     usecols=[0,1,2,8,9,10,11,12])
comm_area_demographics = comm_area_demographics[comm_area_demographics['Percent Non-Hispanic Other or Multiple Races'].notnull()]
comm_area_demographics = comm_area_demographics.rename({'Community \nNumber': 'commarea_n',
                                                        'Percent Non-Hispanic Asian ': 'Percent Non-Hispanic Asian'}, axis='columns')
comm_area_demographics['commarea_n'] = comm_area_demographics['commarea_n'].astype('str')
comm_area_demographics['commarea_n'].replace('\.0$', '', regex=True, inplace=True)

for cols in ["Total Population", "Percent Hispanic","Percent Non-Hispanic Black", "Percent Non-Hispanic White", "Percent Non-Hispanic Asian", "Percent Non-Hispanic Other or Multiple Races" ]:
  comm_area_demographics[cols].replace('\,', '', regex=True, inplace=True)
  comm_area_demographics[cols].replace('\%', '', regex=True, inplace=True)
  comm_area_demographics[cols] = comm_area_demographics[cols].astype('int')

for cols in ["Percent Hispanic","Percent Non-Hispanic Black", "Percent Non-Hispanic White", "Percent Non-Hispanic Asian", "Percent Non-Hispanic Other or Multiple Races" ]:
  comm_area_demographics[cols] = comm_area_demographics[cols]/100
  num = re.sub("Percent ", "pop_", cols)
  comm_area_demographics[num] = comm_area_demographics['Total Population']*(comm_area_demographics[cols])

comm_area_demographics['percent_poc'] = comm_area_demographics['Percent Hispanic'] + comm_area_demographics['Percent Non-Hispanic Black'] + \
  comm_area_demographics['Percent Non-Hispanic Asian'] + comm_area_demographics['Percent Non-Hispanic Other or Multiple Races']

#aggregate by commuity area
comm_area_counts = census_to_counts_map.groupby('commarea_n').agg({'loans_2007':'sum', 'loans_2017' : 'sum',
                                                               'pop_2007':'sum', 'pop_2017':'sum'}).reset_index()

#merge with geography and demographic info
comm_area_info = comm_area_demographics.merge(comm_area_counts, on='commarea_n', how='left')

comm_area_merged = comm_area_gpd.merge(comm_area_info, left_on='area_num_1', right_on = 'commarea_n', how='inner', indicator=True)
comm_area_merged['centroid_lon'] = comm_area_merged['geometry'].centroid.x
comm_area_merged['centroid_lat'] = comm_area_merged['geometry'].centroid.y
comm_area_merged["loans_per_hunderd_2017"] = (comm_area_merged['loans_2017']/comm_area_merged['Total Population'])*100

#prepare for mapping
comm_json = json.loads(comm_area_merged.to_json())
comm_data = alt.Data(values=comm_json['features'])
In [0]:
comm_area_name = comm_area_merged[['community', "commarea_n", "Percent Non-Hispanic Black", "Percent Hispanic", "Percent Non-Hispanic White"]]
In [0]:
census_w_holc = census_w_holc.merge(comm_area_name, on="commarea_n", how='left')
In [0]:
neighborhood_avg_holc = census_w_holc[census_w_holc['holc_grade_n'] > 0]
neighborhood_avg_holc = neighborhood_avg_holc.groupby(['community', 'commarea_n']).agg({'loans_2017' : 'sum',
                                                                                       'loans_2007': 'sum',
                                                                                       'holc_grade_n':'mean',
                                                                                       'pop_2017' : 'sum',
                                                                                       'Percent Hispanic':'mean',
                                                                                       'Percent Non-Hispanic Black': 'mean',
                                                                                       'Percent Non-Hispanic White':'mean'})
In [63]:
#sinplot()
sns.set_style({'text.color': '.01'})

my_pal = {"NA": "#B5B3A3", "D": "#ca0b22", "C":"#fc7335", "B": "#febd59", "A":"#fed16f"}

f, ax = plt.subplots(figsize=(12, 10))
dy="holc_grade"; dx="loans_per_100_ppl_2017"; ort="h"; pal = sns.color_palette(n_colors=4)
df = census_w_holc
ax=pt.half_violinplot( x = dx, y = dy, data = df, palette = my_pal, bw = .2, cut = 0.,
                      scale = "count", width = .8, inner = None, orient = ort)
ax=sns.stripplot( x = dx, y = dy, data = df, palette = my_pal, edgecolor = "white",
                 size = 3, jitter = 1, zorder = 0, orient = ort)
ax=sns.boxplot( x = dx, y = dy, data = df, color = "black", width = .15, zorder = 10,\
            showcaps = True, boxprops = {'facecolor':'none', "zorder":10},\
            showfliers=False, whiskerprops = {'linewidth':2, "zorder":10},\
               saturation = 1, orient = ort)

#plt.title("Areas redlined in 1940 recieve a wide distribution of loans today")
plt.xlabel("Loans per 100 People")
plt.ylabel("HOLC Grade")
plt.suptitle('Redlined Areas Receive a Wide Distribution of Loans Today', fontsize='medium')
ax.set_title("Most of Chicago was rated Hazardous (D) or Declining (C) in 1940 - Source: HMDA 2017 Data", fontsize='x-small')

plt.show()

Each point above represents a census tract. The chart also shows that only four census tracts were rated “Best” in the 1940s. The HOLC rated Chicago while the city was still suffering from the Great Depression, so the majority of the city was rated “C,” and “D”. Today these tracts have a wide distribution of loans per capita originated today, with some very high outliers that receive the most loans per capita in the whole city.

These census tracts are in the Near South Side and West Town neighborhoods.

Case Study: Washington Park and Near South Side

In [0]:
washington_park = census_w_holc[census_w_holc['commarea_n'] == '40']
washington_park['ind'] = 1
washington_park = washington_park.groupby('ind').sum()[['white_pop_2010', 'black_pop_2010', 'AIAN_pop_2010',
       'asian_pop_2010', 'hispanic_pop_2010', 'white_pop_2017', 'black_pop_2017',
       'AIAN_pop_2017', 'asian_pop_2017', 'hispanic_pop_2017']]
washington_park_u = washington_park.unstack().to_frame().reset_index()
washington_park_u['year'] = washington_park_u['level_0'].str[-4:]
washington_park_u['race_ethnicity'] = washington_park_u['level_0'].str[0:-9]
washington_park_u = washington_park_u.rename({0: "population"}, axis = "columns")
washington_park_u = washington_park_u[["population", "year", "race_ethnicity"]]
washington_park_u.loc[washington_park_u['race_ethnicity'] == 'white', 'race_ethnicity'] = "White"
washington_park_u.loc[washington_park_u['race_ethnicity'] == 'black', 'race_ethnicity'] = "Black or African American"
washington_park_u.loc[washington_park_u['race_ethnicity'] == 'hispanic', 'race_ethnicity'] = "Latino"
washington_park_u.loc[washington_park_u['race_ethnicity'] == 'asian', 'race_ethnicity'] = "Asian"
washington_park_u.loc[washington_park_u['race_ethnicity'] == 'American Indian or Alaska Native', 'race_ethnicity'] = "AIAN"

wp_2000 = {"year": [2000, 2000,2000], "race_ethnicity" : ['Black or African American', 'Latino', 'White'], "perc" : [.975, .009, .005]}
wp_2000 = pd.DataFrame.from_dict(wp_2000)

wp_2000['population'] = wp_2000['perc']*14146
washington_park_all = washington_park_u.append(wp_2000)
In [0]:
near_south = census_w_holc[census_w_holc['commarea'] == '33']

near_south_pivot = near_south[['tot_pop_2010', 'white_pop_2010', 'black_pop_2010', 'AIAN_pop_2010',
       'asian_pop_2010', 'hispanic_pop_2010', 'tot_pop_2017', 'white_pop_2017', 'black_pop_2017',
       'AIAN_pop_2017', 'asian_pop_2017', 'hispanic_pop_2017', 'loans_2007',
       'loans_2017']].sum().to_frame()

d = {'race_ethnicity': ['White', 'Black or African American', 'Asian', 'Latino', 'AIAN', 'White', 'Black or African American', 'Asian', 'Latino', 'AIAN', 'White', 'Black or African American', 'Asian', 'Latino', 'AIAN'],
     'year': [2000, 2000, 2000, 2000, 2000,2010, 2010, 2010, 2010, 2010, 2017, 2017, 2017, 2017, 2017],
     'population': [5320, 4166, 469, 3858, 469, 7786.0,6343.0, 2425.0, 938.0, 54.0, 11063.0,  	5537.0, 5098.0, 1247.0, 22.0 ]}

south_side_pop = pd.DataFrame.from_dict(d)

race_south_side = hmda_all[hmda_all['census_tract_number'].isin(["8140", "3302", "3301"])]
race_washington = hmda_all[hmda_all['census_tract_number'].isin(["8345", "8361", "4003", "4004", "4005",  "4008"])]
washington_park_all['neighborhood'] = "Washington Park"
south_side_pop['neighborhood'] = "Near South Side"
south_and_washington = south_side_pop.append(washington_park_all)
south_and_washington.loc[south_and_washington['race_ethnicity'] == 'AIAN', 'race_ethnicity'] = " American Indian or Alaskan"
In [66]:
south_side_pop_chart = alt.Chart(south_and_washington).mark_bar(opacity = .9).encode(
    x= alt.X("year:N", title="Year"),
    y=alt.Y("population:Q", title= "Population"),
    color= alt.Color("race_ethnicity:N", title= "Race/Ethnicity")
).properties(
    width=250,
    height=500).facet(
    'neighborhood:N', title=None, bounds="flush", spacing=50).properties(
   title={"text" : "Population is Growing in Outlier Neighborhoods, while Shrinking in Others",
                             'subtitle' : ["Near South Side has grown overall, though the Latinx population has declined",
                                           "Washington Park has experienced an exodus of Black residents, leading to overall decline in population",
                                           "Source: Census 2000, ACS 2010 and 2017 Estimates"],
          "subtitleFontSize":14
                             }).configure_legend(
    padding=1,
    cornerRadius=1,
    orient='right',
    columns = 1,
).configure_axisY(grid=False).configure_axisX(labelFlush=True)

south_side_pop_chart
Out[66]:

The highest number of loans per capita originated in the Near South Side neighborhood in 2017. The neighborhood has grown by almost 10,000 people since 2000. Whites, Asians and Blacks experienced a growth in their demographics. The Latino population however decreased by almost 3,000 over the last 17 years. In contrast Washington Park had one of the lowest loans per capita originated in the city. The neighborhoods population has dropped by almost 3,000, mostly all due to the decrease in Black residents. The number of loans originated in Washington Park was already low, and decreased even more than in Near South Side since 2007. In 2017 the loans originated in Washington Park was almost exclusively to Black applicants.

The difference in who received loans in Washington Park versus Near South Side begs the question, is Chicago becoming more or less integrated? In Near South Side the Latino demographic fell, while the Asian and White demographic groups increased, and the Black demographic group also grew slightly. Washington Park was almost 100% Black in 2000, and has lost Black residents since then. The rate of integration, according to loans in 2007 and 2017 has also fallen slightly - almost all the loans in 2017 went to Black applicants.

Throughout Chicago loans originations are concentrated in areas in the North and North West neighborhoods. There is a negative correlation between the percent people of color in a neighborhood and the number of loans originated, though the relationship is not straightforward in different geographies.

Where are Loans Originated Today?

In [0]:
#clean neighborhood data
comm_area_demographics = pd.read_csv("data/raw/Census-Data-by-Chicago-Community-Area-2018.csv",
                                     usecols=[0,1,2,8,9,10,11,12])
comm_area_demographics = comm_area_demographics[comm_area_demographics['Percent Non-Hispanic Other or Multiple Races'].notnull()]
comm_area_demographics = comm_area_demographics.rename({'Community \nNumber': 'commarea_n',
                                                        'Percent Non-Hispanic Asian ': 'Percent Non-Hispanic Asian'}, axis='columns')
comm_area_demographics['commarea_n'] = comm_area_demographics['commarea_n'].astype('str')
comm_area_demographics['commarea_n'].replace('\.0$', '', regex=True, inplace=True)

for cols in ["Total Population", "Percent Hispanic","Percent Non-Hispanic Black", "Percent Non-Hispanic White", "Percent Non-Hispanic Asian", "Percent Non-Hispanic Other or Multiple Races" ]:
  comm_area_demographics[cols].replace('\,', '', regex=True, inplace=True)
  comm_area_demographics[cols].replace('\%', '', regex=True, inplace=True)
  comm_area_demographics[cols] = comm_area_demographics[cols].astype('int')

for cols in ["Percent Hispanic","Percent Non-Hispanic Black", "Percent Non-Hispanic White", "Percent Non-Hispanic Asian", "Percent Non-Hispanic Other or Multiple Races" ]:
  comm_area_demographics[cols] = comm_area_demographics[cols]/100
  num = re.sub("Percent ", "pop_", cols)
  comm_area_demographics[num] = comm_area_demographics['Total Population']*(comm_area_demographics[cols])

comm_area_demographics['percent_poc'] = comm_area_demographics['Percent Hispanic'] + comm_area_demographics['Percent Non-Hispanic Black'] + \
  comm_area_demographics['Percent Non-Hispanic Asian'] + comm_area_demographics['Percent Non-Hispanic Other or Multiple Races']

#aggregate by commuity area
comm_area_counts = census_to_counts_map.groupby('commarea_n').agg({'loans_2007':'sum', 'loans_2017' : 'sum',
                                                               'pop_2007':'sum', 'pop_2017':'sum'}).reset_index()

#merge with geography and demographic info
comm_area_info = comm_area_demographics.merge(comm_area_counts, on='commarea_n', how='left')

comm_area_merged = comm_area_gpd.merge(comm_area_info, left_on='area_num_1', right_on = 'commarea_n', how='inner', indicator=True)
comm_area_merged['centroid_lon'] = comm_area_merged['geometry'].centroid.x
comm_area_merged['centroid_lat'] = comm_area_merged['geometry'].centroid.y
comm_area_merged["loans_per_hunderd_2017"] = (comm_area_merged['loans_2017']/comm_area_merged['Total Population'])*100

#prepare for mapping
comm_json = json.loads(comm_area_merged.to_json())
comm_data = alt.Data(values=comm_json['features'])
In [68]:
base = alt.Chart(comm_data).mark_geoshape(
        stroke='black',
        strokeWidth=1
    ).encode(
    ).properties(
        width=600,
        height=600
    )

chloro_perc_poc = alt.Chart(comm_data).mark_geoshape(
    ).encode(alt.Color("properties.percent_poc",
                  type='quantitative',
                  scale=alt.Scale(scheme="tealblues"),
                  title = "Percent People of Color")).properties(
        width=600,
        height=600)

points = alt.Chart(comm_data).mark_circle().encode(
    longitude = 'properties.centroid_lon:Q',
    latitude = 'properties.centroid_lat:Q',
    size = alt.Size("properties.loans_2017:Q", title = "Number of Loans"),
    color = alt.value('black'),
    tooltip=['properties.community:N', 'properties.loans_2017:Q']
    ).properties(width=600,
                height=600)



poc_num_loans = alt.layer(base, chloro_perc_poc, points).properties(title={"text":"Loans Are Concentrated in the North and West of the City",
                                                                           "subtitle": ["Areas with a very high percentage people of color recieve fewer loans,",
                                                                                        "But the relationship between loan locations and neighborhood demographics in more mixed neighborhoods is less clear",
                                                                                        "HMDA 2017 Data, ACS 2017 Estimates"]})
poc_num_loans
Out[68]:

The relationship between the number of loans and percent of color exists, but there also appears to be areas in the far north and far west of the city where the relationship is less clear.

To elucidate the types of neighborhoods receiving the most loans, I bin together census tracts by median income and percent black and latino residents. This grouping suggests that loan originations are concentrated in two types of areas - upper-middle income areas with a medium to high concentration of white residents, and low-income areas with almost only Black and Latino residents.

Loans by % minority and median income of tract

In [0]:
census_w_holc['black_latino'] = census_w_holc['black_pop_2017'] + census_w_holc['hispanic_pop_2017']
census_w_holc['perc_black_latino'] = census_w_holc["black_latino"]/census_w_holc['tot_pop_2017']
In [70]:
poc_loan_heat = alt.Chart(census_w_holc).mark_circle().encode(
    alt.X('median_income_tract_2017:Q', bin=alt.Bin(maxbins=10), title= "Median Income Of Tract ($)"),
    alt.Y('perc_black_latino:Q', bin=alt.Bin(maxbins=10),
          axis=alt.Axis(format='%'), title= "Percent Black and Latino Residents of Census Tract"),
    size = alt.Size("sum(loans_2017):Q"),
    color = alt.Color("sum(loans_2017):Q",
        scale=alt.Scale(scheme='tealblues'), title="2017 Home Loans")
).properties(title = {"text":["Loan Originations by Tract Median Income and Percent Black and Latino Residents"],
                    "subtitle" : ["Loan originations are Concentrated in Upper-Middle Income mixed demographic neighborhoods", "and low-income Black and Latino neighborhoods",
                                  "Source: 2017 HMDA Data"],
                    'subtitleFontSize':14},
        width=500,
        height=500).configure_axisY(grid = True)

poc_loan_heat
Out[70]:

The most concentrated bin above contains low-income census tracts with almost exclusively Black and Latino residents. There was also a centration of loans in upper-middle income census tracts with 0-30% Black and Latino residents. This plot reveals most obviously that banks are no longer completely redlining Black and Latino neighborhoods, unlike in the mid 20th century. However, we cannot assess the quality of such loans because the Consumer Financial Protection Bureau does not release interest rate, to the detriment of policy analysts.

The plot also begs the questions, how are lending patterns impacting segregation in Chicago neighborhood? Redlining practices were meant to isolate Black communities in specific neighborhoods. The maps above show that the locations of Black and Brown neighborhoods have changed, but lending practices that isolate such communities in specific may have remained the same.

Where do people move?

In [0]:
census_w_holc['perc_poc_2017'] = census_w_holc['black_pop_2017']/census_w_holc['tot_pop_2017']
census_w_holc['perc_black'] = census_w_holc['black_pop_2017']/census_w_holc['tot_pop_2017']
census_w_holc['perc_hispanic'] = census_w_holc['hispanic_pop_2017']/census_w_holc['tot_pop_2017']
census_w_holc['perc_asian'] = census_w_holc['asian_pop_2017']/census_w_holc['tot_pop_2017']
In [0]:
def create_binned_df_for_ridgeplot(demographic_x_variable):
  census_poc= census_w_holc[['census_tract_number', 'commarea', 'perc_black_latino', 'perc_black', 'perc_hispanic', 'perc_asian']]
  hmda_2017_all = hmda_all[hmda_all['as_of_year'] == 2017]
  hmda_2017_all = hmda_2017_all.merge(census_poc, on='census_tract_number', how="inner")
  mean_by_race = hmda_2017_all.groupby('race_ethnicity').agg({demographic_x_variable : 'mean'}).reset_index()
  #mean_by_race = mean_by_race.rename({'perc_black_latino': 'mean_poc'}, axis='columns')
  bins = np.arange(0, 1.05, .05)
  hmda_2017_all['bin_series'] = pd.cut(hmda_2017_all[demographic_x_variable], bins=bins)
  binned = hmda_2017_all.groupby(['bin_series', 'race_ethnicity']).agg({'respondent_id':'count', demographic_x_variable:'mean'})
  total = hmda_2017_all.groupby(['race_ethnicity']).agg({'respondent_id':'count'}).rename({'respondent_id':'total'}).reset_index()
  binned = binned.reset_index()

  total = hmda_2017_all.groupby(['race_ethnicity']).agg({'respondent_id':'count'}).rename({'respondent_id':'total'}, axis='columns').reset_index()
  binned = binned.merge(total, on='race_ethnicity', how='left')
  binned['value'] = binned['respondent_id']/binned['total']

  binned['bin_series_str'] = binned['bin_series'].astype('str')
  binned['bin_max'] = binned['bin_series_str'].str[-4:-1].astype('float')
  binned['bin_min'] = binned['bin_series_str'].str[1:5].str.rstrip('\,').astype('float')
  binned_plot = binned[['race_ethnicity', 'value', 'bin_max', 'bin_min']]
  binned_plot = binned_plot[binned_plot['race_ethnicity'].isin(["Asian", "Black or African American", "White", "Latino"])]
  binned_plot = binned_plot.merge(mean_by_race, on='race_ethnicity', how='left')
  binned_plot.loc[binned_plot["race_ethnicity"]=="Black or African American", 'race_ethnicity'] = "Black"

  step = 35
  overlap = .8

  if demographic_x_variable == 'perc_black':
    title = "Percent Black Residents in Census Tract where Applicant Moved to"

  elif demographic_x_variable == "perc_hispanic":
    title = "Percent Latino Residents in Census Tract where Applicant Moved to"

  chart = alt.Chart(binned_plot, height=step).transform_impute(
      impute='value', groupby=['race_ethnicity', demographic_x_variable], key='bin_min', value=0
  ).mark_area(
      interpolate='monotone',
      fillOpacity=0.8,
      stroke='lightgray',
      strokeWidth=0.5
  ).encode(
      alt.X('bin_min:Q', bin='binned', title=title, axis=alt.Axis(format='%')),
      alt.Y(
          'value:Q',
          scale=alt.Scale(range=[step, -step * overlap]),
          axis=None
  ), alt.Fill(
          'race_ethnicity:N',
          legend=None)
      ).properties(width = 500, bounds="flush").facet(
      row=alt.Row(
          'race_ethnicity:N', title=None,
          header=alt.Header(labelAngle=0, labelAlign='left'))
  ).properties(bounds='flush'
  )

  return chart
In [155]:
black_res = create_binned_df_for_ridgeplot('perc_black')
latino_res = create_binned_df_for_ridgeplot('perc_hispanic')
alt.vconcat(black_res, latino_res).properties(title={"text":["Demographics of the Communities People Move to by Race of Applicant",
                              "Black Home Buyers almost exclusively move to Black Communities"],
                      "subtitle": ['White and Asians move to mostly White communities, and Latinos move to White and Latino communities',
                                   "Source: 2017 HMDA Data, ACS 2017 Estimates"]})
Out[155]:

The plot above shows that Black home buyers bought almost all of their homes in communities with more than 80% Black residents. Asians and Latinos bought most of their homes in areas with less than 10% Black residents. Latinos don't buy homes in Black neighborhoods, but also don't move specifically to majority Latino neighborhoods.

White applicants bought the most homes in census tracts with less than 10% Black or Latino residents, but also bought homes in areas with between 20% and 60% Latino residents. This supports the observed phenemon in Chicago that Latino neighborhoods become integrated and gentrified faster than Black neighborhoods.

Very few White loan applicants bought a home in a census tract with more than 70% Black or Latino residents. Asians bought homes in areas with a small percentage of Black and Latino residents, but also moved to areas with more Latinos than Blacks.

This plot shows that the intention of redlining - to isolate Black communities in certain segregated neighborhoods, continues today.

Additionally, Blacks are still receiving far fewer loans than their White counterparts, and the gap has only widened since 2007.

Loan Demographics

In [0]:
#clean race data
hmda_all['race_ethnicity'] = np.where(hmda_all['applicant_ethnicity_name'] == 'Hispanic or Latino', 'Latino', hmda_all['applicant_race_name_1'])
hmda_all.loc[hmda_all['race_ethnicity'] == 'Information not provided by applicant in mail, Internet, or telephone application', 'race_ethnicity'] = 'NA'
hmda_all.loc[hmda_all['race_ethnicity'] == 'Not applicable', 'race_ethnicity'] = 'NA'

hmda_all.loc[hmda_all['race_ethnicity'] == 'Black or African American', 'race_ethnicity'] = 'Black'
hmda_all.loc[hmda_all['race_ethnicity'] == 'Native Hawaiian or Other Pacific Islander', 'race_ethnicity'] = 'NHPI'
hmda_all.loc[hmda_all['race_ethnicity'] == 'American Indian or Alaska Native', 'race_ethnicity'] = 'AIAN'

#clean income data
#bin the income ranges
# <50: low, 50< and <80 moderate, >80 and <120 middle, >120 upper
hmda_all['tract_income_bin'] = np.where(hmda_all['tract_to_msamd_income'] < 50, 'low',
                                              np.where(hmda_all['tract_to_msamd_income'] < 80, 'moderate',
                                                       np.where(hmda_all['tract_to_msamd_income'] < 120, 'middle',
                                                                np.where(hmda_all['tract_to_msamd_income'] < 1000, 'upper', 'NA'))))


# aggregate by census tract because altair can't handle my data...
In [0]:
race_by_year = hmda_all.groupby(['as_of_year', 'race_ethnicity', 'census_tract_number']).agg(
    {'respondent_id':'count', 'population' : 'mean'}).reset_index()
race_by_year = race_by_year.groupby(['as_of_year', 'race_ethnicity']).agg({'respondent_id':'sum', 'population':'sum'}).reset_index()
race_by_year['loans_per_100'] = 100*race_by_year['respondent_id']/race_by_year['population']
race_by_year = race_by_year.rename({'as_of_year': 'year'}, axis="columns")

#create aggregated home purchases by year and race

#sort by population
race_by_year['pop_rank'] = np.where(race_by_year['race_ethnicity'] == "White", 1,
                                    np.where(race_by_year['race_ethnicity'] == "Black", 2,
                                    np.where(race_by_year['race_ethnicity'] == "Latino", 3,
                                    np.where(race_by_year['race_ethnicity'] == "Asian", 4,
                                    5))))

population = race_by_year.groupby("year").agg({"population":"sum"}).reset_index().rename(
    {"population" : "total"}, axis="columns")

race_by_year = race_by_year.merge(population, on="year", how="left")
race_by_year["perc_pop"] = race_by_year["population"]/race_by_year["total"]
In [148]:
loans_race_ethnicity_year = alt.Chart(race_by_year).mark_line(size=8).encode(
    x= alt.X('year:O', title = "Year",   scale=alt.Scale(
            domain=(2000, 2017),
            clamp=True), axis = alt.Axis(labelFlush=True)),
    y=alt.Y('loans_per_100:Q', title = "Loans Per 100 People", axis=alt.Axis(format='%')),
    color= alt.Color('race_ethnicity:N', title="Race/Ethnicity")).properties(
        title = {"text" : "People of Color still recieve far fewer loans per capita than White Individuals",
        "subtitle": ["The lending gap has widened for Black individuals since 2007"],
        "subtitleFontSize" : 14},
             height = 400,
             width = 500)


alt.layer(loans_race_ethnicity_year + loans_race_ethnicity_year.mark_point()).configure_legend(
    padding=1,
    cornerRadius=1,
    orient='right',
    columns = 1,
    symbolType= "circle",
    symbolSize= 200
)
#order the legend by
Out[148]:

Latinos also receive far fewer loans compared to Whites relative to their population, though the rate of lending to Latinos has increased since 2007. The rate of lending to Blacks has fallen significantly since 2007, reflecting the fact that African-Americans were more likely to experience foreclosure and were disproportionately targeted for “toxic” loans during the housing boom. The rate of lending to Whites is almost three times higher that to African-Americans relative to their population.

In conclusion, the geography of lending since the mid 20th century has largely changed in Chicago, but the impact of redlining seems to continue today. Far fewer Black indivudals receive loans, and almost all buy homes in highly segreregated, low-income neighborhoods.